#############################################################################################
#############################################################################################
####Appendix: Non-state Atrocities in Capital Cities - Capital Cities and Regime Survival####
#############################################################################################
#############################################################################################
library(survival)
library(Zelig)
library(mvtnorm)
library(stargazer)
library(MASS)

# Set working library
setwd("~/Data/Regime Transition/")

####################################
####################################
###Regime Transition and Survival###
####################################
####################################

###read in the data
dur.dat <- read.csv("CTgeddat.csv")
dur.dat$lngdp <- log(dur.dat$wdi_gdpc+1)
dur.dat$lnlaggdp <- log(dur.dat$lag.wdi_gdpc+1)
dur.dat$lnunna_pop <- log(dur.dat$unna_pop  +1)
dur.dat$lnlagunna_pop <- log(dur.dat$lag.unna_pop+1)
dur.dat$lnCap.At.sum <- log(dur.dat$Cap.At.sum +1)
dur.dat$c.at.bin <- ifelse(dur.dat$Cap.At.Bin.sum>0,1,0)
dur.dat$ln.lag.ross_oil_prod<- log(dur.dat$lag.ross_oil_prod+1)

#Remove years not analyzed in the main paper
dur.dat <- subset(dur.dat, (dur.dat$year>1995 & dur.dat$year<2010))

####Survival analysis 
#Baseline model
sur.1 <- coxph(Surv(dur.dat$gwf_duration, dur.dat$gwf_fail, type=c('right')) ~  lnCap.At.sum + lagpolity2 + 
  lagincidentnonstatefull.sum + conf + lnlaggdp + lnlagunna_pop, data = dur.dat)
summary(sur.1)
AIC(sur.1)

#Medium model
sur.2 <- coxph(Surv(dur.dat$gwf_duration, dur.dat$gwf_fail, type=c('right')) ~  lnCap.At.sum + lagpolity2 + 
                 lagincidentnonstatefull.sum + conf + lnlaggdp + lnlagunna_pop + ln.lag.ross_oil_prod +
                 ln.lag.milex, data = dur.dat)
summary(sur.2)
AIC(sur.2)

#Full model
sur.3 <- coxph(Surv(dur.dat$gwf_duration, dur.dat$gwf_fail, type=c('right')) ~  lnCap.At.sum + lagpolity2 + 
                 lagincidentnonstatefull.sum + conf + lnlaggdp + lnlagunna_pop + ln.lag.ross_oil_prod +
                 ln.lag.milex + mnt1 + lag.ciri_empinx_new, data = dur.dat)
summary(sur.3)
AIC(sur.3)

#Latex
stargazer(sur.1, sur.2, sur.3)
AIC(sur.1, sur.2, sur.3)

#######################
#######################
###Regime Durability###
#######################
#######################

#Import data
count.dat <- read.csv("CTbasedat.csv")
count.dat$lngdp <- log(count.dat$wdi_gdpc+1)
count.dat$lnlaggdp <- log(count.dat$lag.wdi_gdpc+1)
count.dat$lnunna_pop <- log(count.dat$unna_pop  +1)
count.dat$lnlagunna_pop <- log(count.dat$lag.unna_pop+1)
count.dat$lnCap.At.sum <- log(count.dat$Cap.At.sum +1)
count.dat$c.at.bin <- ifelse(count.dat$Cap.At.Bin.sum>0,1,0)
count.dat$ln.lag.ross_oil_prod<- log(count.dat$lag.ross_oil_prod+1)

#Create fixed year dummies
count.dat$year95 <- ifelse(count.dat$year=="1995", 1 , 0)
count.dat$year96 <- ifelse(count.dat$year=="1996", 1 , 0)
count.dat$year97 <- ifelse(count.dat$year=="1997", 1 , 0)
count.dat$year98 <- ifelse(count.dat$year=="1998", 1 , 0)
count.dat$year99 <- ifelse(count.dat$year=="1999", 1 , 0)
count.dat$year00 <- ifelse(count.dat$year=="2000", 1 , 0)
count.dat$year01 <- ifelse(count.dat$year=="2001", 1 , 0)
count.dat$year02 <- ifelse(count.dat$year=="2002", 1 , 0)
count.dat$year03 <- ifelse(count.dat$year=="2003", 1 , 0)
count.dat$year04 <- ifelse(count.dat$year=="2004", 1 , 0)
count.dat$year05 <- ifelse(count.dat$year=="2005", 1 , 0)
count.dat$year06 <- ifelse(count.dat$year=="2006", 1 , 0)
count.dat$year07 <- ifelse(count.dat$year=="2007", 1 , 0)
count.dat$year08 <- ifelse(count.dat$year=="2008", 1 , 0)
count.dat$year09 <- ifelse(count.dat$year=="2009", 1 , 0)
count.dat$year10 <- ifelse(count.dat$year=="2010", 1 , 0)

#Remove years not analyzed in the main paper
count.dat <- subset(count.dat, (count.dat$year>1995 & count.dat$year<2010))

#Baseline model
s3.nb.1 <- glm.nb(p_durable  ~ lnCap.At.sum + lagpolity2 + 
                 lagincidentnonstatefull.sum + conf + lnlaggdp + lnlagunna_pop + cluster(ccode) +
                 year96 + year97+ year98 + year99 + year00 + year01 + year02 + year03 + 
                 year04 + year05 + year06 + year07 + year08 + year09,
               data=count.dat)
summary(s3.nb.1)
AIC(s3.nb.1)

#Medium model
s3.nb.2 <- glm.nb(p_durable  ~ lnCap.At.sum + lagpolity2 + 
                 lagincidentnonstatefull.sum + conf + lnlaggdp + lnlagunna_pop + ln.lag.ross_oil_prod +
                 ln.lag.milex + cluster(ccode) +
                 year96 + year97+ year98 + year99 + year00 + year01 + year02 + year03 + 
                 year04 + year05 + year06 + year07 + year08 + year09 + year10,
               data=count.dat)
summary(s3.nb.2)
AIC(s3.nb.2)

#Full model
s3.nb.3 <- glm.nb(p_durable  ~ lnCap.At.sum + lagpolity2 + 
                 lagincidentnonstatefull.sum + conf + lnlaggdp + lnlagunna_pop + ln.lag.ross_oil_prod +
                 ln.lag.milex + mnt1 + lag.ciri_empinx_new + cluster(ccode) +
                 year96 + year97+ year98 + year99 + year00 + year01 + year02 + year03 + 
                 year04 + year05 + year06 + year07,
               data=count.dat)
summary(s3.nb.3)
AIC(s3.nb.3)

#Latex
stargazer(s3.nb.1, s3.nb.2, s3.nb.3)

######################################
###Predicted probabilities: model 3###
######################################
###create fake data
log.3.dat <- na.omit(count.dat[,c("p_durable", "ccode", "lnCap.At.sum", "lagpolity2", "lagincidentnonstatefull.sum", "conf", "lnlaggdp", "lnlagunna_pop", "ln.lag.ross_oil_prod", "ln.lag.milex", "mnt1", "lag.ciri_empinx_new", "year95", "year96","year97","year98","year99","year00","year01","year02","year03","year04","year05","year06","year07","year08","year09", "year10")])
lnCap.At.sum.inf <- log.3.dat$lnCap.At.sum
lnCap.At.sum.inf[lnCap.At.sum.inf==0] <- NA
lnCap.At.sum.inf <- na.omit(lnCap.At.sum.inf)

lagpolity2 <- as.numeric(median(log.3.dat$lagpolity2))
lagincidentnonstatefull.sum <- as.numeric(median(log.3.dat$lagincidentnonstatefull.sum))
conf <- as.numeric(median(log.3.dat$conf))
lnlaggdp <- as.numeric(mean(log.3.dat$lnlaggdp))
lnlagunna_pop <- as.numeric(mean(log.3.dat$lnlagunna_pop))
ln.lag.ross_oil_prod <- as.numeric(mean(log.3.dat$ln.lag.ross_oil_prod))
ln.lag.milex <- as.numeric(mean(log.3.dat$ln.lag.milex))
mnt1 <- as.numeric(mean(log.3.dat$mnt1))
lag.ciri_empinx_new <- as.numeric(median(log.3.dat$lag.ciri_empinx_new))
clusterccode <- mean(cluster(log.3.dat$ccode))
year96 <- as.numeric(median(log.3.dat$year96))
year97 <- as.numeric(median(log.3.dat$year97))
year98 <- as.numeric(median(log.3.dat$year98))
year99 <- as.numeric(median(log.3.dat$year99))
year00 <- as.numeric(median(log.3.dat$year00))
year01 <- as.numeric(median(log.3.dat$year01))
year02 <- as.numeric(median(log.3.dat$year02))
year03 <- as.numeric(median(log.3.dat$year03))
year04 <- as.numeric(median(log.3.dat$year04))
year05 <- as.numeric(median(log.3.dat$year05))
year06 <- as.numeric(median(log.3.dat$year06))
year07 <- as.numeric(median(log.3.dat$year07))
###Set seed
set.seed(2)
#set sims  (number of simulations)
sims <- 1000
#create some storage matrices
p1<-NULL
p2<-NULL
all.expectedvalues1<-NULL
all.expectedvalues2<-NULL
First.Differences<-NULL
First.Diff.Per <- NULL
all.first.diff.per <- NULL
all.firstdifferences<-NULL
history<-matrix(0,nrow=1,ncol=3)
hist.per <- matrix(0,nrow=1,ncol=3)
#Set coefficients
c <- c(coef(s3.nb.3))
#start loop
for(j in 1:sims){ 
  #predicted#
  bsim<-rmvnorm(1, mean=c, sigma=vcov(s3.nb.3)) 
   xoutcome.1<-rbind(1, quantile(lnCap.At.sum.inf, 0.25),lagpolity2, lagincidentnonstatefull.sum, conf, lnlaggdp, lnlagunna_pop, ln.lag.ross_oil_prod,
                      ln.lag.milex, mnt1, lag.ciri_empinx_new, clusterccode,
                      year96, year97, year98, year99, year00, year01, year02, year03, 
                      year04, year05, year06, year07)
  ####Creating the matrices
  equationcount<-bsim %*% xoutcome.1
  expectedvalue1<-exp(equationcount)
  ###First difference change prediction
  xoutcome.2<-rbind(1,quantile(lnCap.At.sum.inf, 0.75),lagpolity2, lagincidentnonstatefull.sum, conf, lnlaggdp, lnlagunna_pop, ln.lag.ross_oil_prod,
                    ln.lag.milex, mnt1, lag.ciri_empinx_new, clusterccode,
                    year96, year97, year98, year99, year00, year01, year02, year03, 
                    year04, year05, year06, year07)
  ###Creating the matrices
  equationcount2<-bsim %*% xoutcome.2
  expectedvalue2<-exp(equationcount2)
  Firstdifference<-expectedvalue2-expectedvalue1       
  First.Diff.Per <- ((expectedvalue2-expectedvalue1)/expectedvalue1)*100
  all.firstdifferences<-c(all.firstdifferences, Firstdifference)
  all.first.diff.per<- c(all.first.diff.per, First.Diff.Per)
  ###end loop
}
###Create summary
sorted.firstdifferences<-sort(all.firstdifferences)
history[1,1]<-mean(sorted.firstdifferences)
history[1,2]<-sorted.firstdifferences[round(0.95*length(sorted.firstdifferences))]
history[1,3]<-sorted.firstdifferences[round(0.05*length(sorted.firstdifferences))]
###Percent summary
sorted.first.diff.per<-sort(all.first.diff.per)
hist.per[1,1]<- mean(sorted.first.diff.per)
hist.per[1,2] <- sorted.first.diff.per[round(0.95*length(sorted.first.diff.per))]
hist.per[1,3] <- sorted.first.diff.per[round(0.05*length(sorted.first.diff.per))]
###end final loop
#}
history
hist.per


#Same loop, only this time moving from 0 to 95% of capital atrocities
#start loop
for(j in 1:sims){ 
  #predicted#
  bsim<-rmvnorm(1, mean=c, sigma=vcov(s3.nb.3)) 
  xoutcome.1<-rbind(1, 0,lagpolity2, lagincidentnonstatefull.sum, conf, lnlaggdp, lnlagunna_pop, ln.lag.ross_oil_prod,
                    ln.lag.milex, mnt1, lag.ciri_empinx_new, clusterccode,
                    year96, year97, year98, year99, year00, year01, year02, year03, 
                    year04, year05, year06, year07)
  ####Creating the matrices
  equationcount<-bsim %*% xoutcome.1
  expectedvalue1<-exp(equationcount)
  ###First difference change prediction
  xoutcome.2<-rbind(1,quantile(log.3.dat$lnCap.At.sum, 0.95),lagpolity2, lagincidentnonstatefull.sum, conf, lnlaggdp, lnlagunna_pop, ln.lag.ross_oil_prod,
                    ln.lag.milex, mnt1, lag.ciri_empinx_new, clusterccode,
                    year96, year97, year98, year99, year00, year01, year02, year03, 
                    year04, year05, year06, year07)
  ###Creating the matrices
  equationcount2<-bsim %*% xoutcome.2
  expectedvalue2<-exp(equationcount2)
  Firstdifference<-expectedvalue2-expectedvalue1       
  First.Diff.Per <- ((expectedvalue2-expectedvalue1)/expectedvalue1)*100
  all.firstdifferences<-c(all.firstdifferences, Firstdifference)
  all.first.diff.per<- c(all.first.diff.per, First.Diff.Per)
  ###end loop
}
###Create summary
sorted.firstdifferences<-sort(all.firstdifferences)
history[1,1]<-mean(sorted.firstdifferences)
history[1,2]<-sorted.firstdifferences[round(0.95*length(sorted.firstdifferences))]
history[1,3]<-sorted.firstdifferences[round(0.05*length(sorted.firstdifferences))]
###Percent summary
sorted.first.diff.per<-sort(all.first.diff.per)
hist.per[1,1]<- mean(sorted.first.diff.per)
hist.per[1,2] <- sorted.first.diff.per[round(0.95*length(sorted.first.diff.per))]
hist.per[1,3] <- sorted.first.diff.per[round(0.05*length(sorted.first.diff.per))]
###end final loop
#}
history
hist.per

